#' ---
#' author: "```matthew.pietryka@gmail.com```"
#' date: "2019-08-08"
#' output:
#'  html_document:
#'    warning: false
#'    toc: true
#'    code_folding: hide
#' ---


#' **PURPOSE:**
#' This script creates alluvial plots showing the proportion each document borrowed (Figure 1)"



#  load packages  ----------------
library(tidyverse)   # DATA CLEANING FUNCTIONS
library(stringr)     # STRING FUNCTIONS
library(ggrepel)
library(lubridate)   # DATE/TIME FUNCTIONS


#  load data ----------------

# 'dyads_df' OBJECT CREATED IN '1-clean-the-data/SC-1- Dyadic Data.R'
dyads_df <- read_rds("../Data/Derived/dyads_df.rds")
attr(dyads_df, "source")

# SOURCE (source): INNOVATIVE (new)
# FOCAL (source_to):    FULL TEXT
new_to_full_df <- read_rds("../Data/Derived/new_to_full_df.rds")


source("SC-Plots-Preferences.R")




# DESCRIPTIVE STATS FOR TOWN CHARTERS --------
new_to_full_df  %>%
  left_join(dyads_df)  %>%
  # FOCUS ON RELEVANT YEARS
  filter(date_to > date_from)  %>%
  filter(year_to < 1776)    %>%
  select(label_to, label_from, ratio)



# DESCRIPTIVE STATS FOR ORIGINAL 13 --------
## MINUS THE TWO STATES THAT USED TOWN CHARTERS (Connecticut and Rhode Island)

new_to_full_df  %>%
  left_join(dyads_df)  %>%
  # FOCUS ON RELEVANT YEARS
  filter(date_to > date_from) %>%
  # FOCUS ON DOCUMENTS AFTER THE ORIGINAL 13
  filter(first_13_to == TRUE)  %>%
  filter(year_to >= 1776)  %>%
  rename(document_id = to)  %>%
  group_by(document_id, same_state)  %>%
  summarise(prop_borrowed = sum(ratio))  %>%
  ungroup()  %>%
  arrange(prop_borrowed)  %>%
  mutate(n = n())  %>%
  group_by(n)  %>%
  summarise_at(vars(prop_borrowed), list(
    mn = mean, med = median, sd = sd, min = min, max = max
  ))  %>%
  ungroup()  %>%
  mutate_if(is.double, round, 2)


# DESCRIPTIVE STATS EXCLUDING ORIGINAL 13 --------


new_to_full_df  %>%
  left_join(dyads_df)  %>%
  # FOCUS ON RELEVANT YEARS
  filter(date_to > date_from) %>%
  # FOCUS ON DOCUMENTS AFTER THE ORIGINAL 13
  filter(first_13_to == FALSE)  %>%
  rename(document_id = to)  %>%
  group_by(document_id, same_state)  %>%
  summarise(prop_borrowed = sum(ratio))  %>%
  group_by(same_state)  %>%
  mutate(n = n())  %>%
  group_by(same_state, n)  %>%
  summarise_at(vars(prop_borrowed), list(
    mn = mean, med = median, sd = sd, min = min, max = max
    ))  %>%
  ungroup()  %>%
  mutate_if(is.double, round, 2)



# CREATE PLOTTING DATA  ------



allyears_df <- new_to_full_df %>%
  left_join(dyads_df)   %>%
  # FOCUS ON RELEVANT YEARS
  filter(date_to > date_from) %>%
  # EXCLUDE DYADS FROM SAME STATE
  filter(state_code_to != state_code_from)  %>%
  # POST-1776
  filter(year_to >= 1776)  %>%
  # TOTAL BORROWED FROM OTHER STATES
  group_by(label_to)  %>%
  mutate(to_ratio_sum = sum(ratio))  %>%
  ungroup()  %>%
  # STATE LABELS
  mutate(from_lab = paste(year_from, state_full_from),
         to_lab = paste(year_to, state_full_to))  %>%
  select(state_code_to, state_code_from,
          from_lab, date_from, to_lab, date_to, ratio, to_ratio_sum)

# BEFORE CIVIL WAR
prewar_df <- allyears_df %>%
  filter(date_to < ymd("1861-04-12")) %>%
  group_by(from_lab)  %>%
  summarise(from_ratio_sum = sum(ratio))  %>%
  top_n(5, from_ratio_sum)  %>%
  left_join(allyears_df)  %>%
  mutate(from_lab = as.factor(from_lab)  %>% fct_reorder(from_ratio_sum)) %>%
  filter(date_to < ymd("1861-04-12"))



# POST-CIVIL WAR
postwar_df <- allyears_df %>%
  filter(date_to >= ymd("1861-04-12"))  %>%
  group_by(from_lab)  %>%
  summarise(from_ratio_sum = sum(ratio))  %>%
  top_n(5, from_ratio_sum)  %>%
  left_join(allyears_df)  %>%
  mutate(from_lab = as.factor(from_lab)  %>% fct_reorder(from_ratio_sum))  %>%
  filter(date_to >= ymd("1861-04-12"))




#' # PRE-WAR PLOT
# PRE WAR PLOT ---------------

prewar_plot <- ggplot(prewar_df, aes(x =  to_lab)) +
  geom_line(aes(group = 1L, y = to_ratio_sum), linetype = "dotted") +
  geom_area(aes(group = from_lab, fill = from_lab, y = ratio)) +
  annotate(geom = "text", x = 14, y = 0.47,
           color = label_color,
           label = "Proportion of text first used\nin any previous constitution") +
  annotate("segment", x = 20.1, xend = 24.8, y = .47, yend = .41,
           color = label_color,
           size = 1.2,
           arrow = arrow(length = unit(0.3, "cm"),
                         type = "open")) +
  xlab("Focal Constitution") +
  ylab("Proportion Borrowed") +
  theme_sc() +
  ggtitle(NULL) +
  theme(
    axis.text.x = element_text(angle = 90,    hjust = 1,    vjust = 0),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.y = element_blank()

    ) +
  scale_fill_ipsum() +
  scale_color_ipsum() +
  theme(legend.position = "bottom")  +
  guides(
    color = FALSE,
    fill = guide_legend(title = "Source Constitution", title.position = "left"))

graphics.off()
windows(10, 5)
prewar_plot

#' ## DISPLAY TOTAL PROPORTION BORROWED
prewar_df  %>%
  select(to_lab, to_ratio_sum)  %>%
  distinct()  %>%
  mutate_if(is.double, round, 2) %>%
  print(n = 1000)

#' ## DISPLAY PROPORTION BORROWED FROM TOP FIVE SOURCES
prewar_df  %>%
  select(to_lab, from_lab, ratio)    %>%
  distinct()  %>%
  mutate_if(is.double, round, 2) %>%
  print(n = 1000)

#' # POST-WAR PLOT
# POST WAR PLOT ---------------

postwar_plot <- ggplot(postwar_df, aes(x =  to_lab)) +
  geom_line(aes(group = 1L, y = to_ratio_sum), linetype = "dotted") +
  geom_area(aes(group = from_lab, color = NULL, fill = from_lab, y = ratio)) +
  xlab("Focal Constitution") +
  ylab("Proportion Borrowed") +
  theme_sc() +
  ggtitle(NULL) +
  theme(
    axis.text.x = element_text(angle = 90,    hjust = 1,    vjust = 0),
    panel.grid.major.x = element_blank(),
    panel.grid.minor.y = element_blank()
  ) +
  scale_color_brewer(palette = "Set1") +
  scale_fill_brewer(palette = "Set1") +
  theme(legend.position = "bottom")  +
  guides(
    color = FALSE,
    fill = guide_legend(title = "Source Constitution", title.position = "left"))



graphics.off()
windows(10, 5)
postwar_plot

#' ## DISPLAY TOTAL PROPORTION BORROWED
postwar_df  %>%
  select(to_lab, to_ratio_sum)  %>%
  distinct()  %>%
  mutate_if(is.double, round, 2) %>%
  print(n = 1000)

#' ## DISPLAY PROPORTION BORROWED FROM TOP FIVE SOURCES
postwar_df    %>%
  select(to_lab, from_lab, ratio)    %>%
  distinct()  %>%
  mutate_if(is.double, round, 2) %>%
  print(n = 1000)



# PUT PLOTS ON SAME Y SCALE ------------

# LIMITS
prewar_y  <- ggplot_build(prewar_plot)$layout$panel_scales_y[[1]]$range$range
postwar_y <- ggplot_build(postwar_plot)$layout$panel_scales_y[[1]]$range$range

shared_y <- c(
  min(prewar_y[[1]], postwar_y[[1]]),
  max(prewar_y[[2]], postwar_y[[2]])
  )


# SAVE ---------------

graphics.off()
windows(10, 5)
prewar_plot +
  scale_fill_grey() +
  scale_color_grey() +
  expand_limits(y = shared_y)
ggsave("Plots/area_prewar_grey.png")

prewar_plot +
  expand_limits(y = shared_y)
ggsave("Plots/area_prewar.png")


postwar_plot +
  scale_fill_grey() +
  scale_color_grey() +
  expand_limits(y = shared_y)
ggsave("Plots/area_postwar_grey.png")

postwar_plot +
  expand_limits(y = shared_y)
ggsave("Plots/area_postwar.png")




#  Session Info ------
#' # Display session information:
sessionInfo()
